library(foreign)

#Randomization inference (requires loading the ri package)
r_inference <- function(some_data, dv, tr = 'cus_sr', clustvar = 'ccode', maxiter = 100000){
  perms <- genperms(some_data[[tr]], maxiter = maxiter, clustvar = some_data[[clustvar]]) # all possible permutations
  probs <- genprobexact(some_data[[tr]], clustvar = some_data[[clustvar]]) # probability of treatment
  ate <- estate(some_data[[dv]], some_data[[tr]], prob = probs)
  Ys <- genouts(some_data[[dv]], some_data[[tr]])
  distout <- gendist(Ys, perms, prob=probs) # generate sampling dist. under sharp null
  pvals <- dispdist(distout, ate)
  return(pvals)
}

#Bootstrap Cox model
coxph_boot <- function(the_data, frmla, indices){
  dt <- the_data[indices, ]
  coxph.out <- coxph(as.formula(frmla), data = dt)
  return(coxph.out$coef)
}

#Display numeric objects as strings
deci <- function(x, k, center = TRUE) {
  a <- format(round(x, k), nsmall=k)
  return(a)
}

#Estimate Cox model using multiply imputed data
ameliaCoxEstimate <- function(ameliaObject, formul, DV_Label = 'y', fail_var = 'cus_fail', id_var = 'cus_caseid', Bootstrap = F){
  #ameliaObject must the the output from multiple imputation 
  beta <- NULL
  se <- NULL
  ph.test <- NULL
  M <- ameliaObject[['m']]
  beta_variance <- list()
  covarianceMatrices <- list()
  survObject <- list()
  for (i in 1:ameliaObject[['m']]){
    print (i)
    dat <- ameliaObject[['imputations']][[i]]
    caseids <- as.character(dat[[id_var]])
    dat$id <- 1:nrow(dat)
    model <- coxph(update(formul, .~.+cluster(id)),#the cluster term gives heteroskedasticity consistent standard errors
                     data = dat,
                     method = c("efron"),
                     x = TRUE,
                     y = TRUE,
                     model = TRUE)
    censInd <- model$y[, 'status']
    var_names <-  names(model$coef)
    new_beta <- model$coef
    beta <- rbind(beta, model$coef[var_names])
    if (Bootstrap == F){
      new_se <- sqrt(diag(model$var)); names(new_se) <- var_names
      }else{
      bootstrap.out <- boot(dat, coxph_boot, R = 100000, frmla = formul)
      new_se <- apply(bootstrap.out$t, 2, sd); names(new_se) <- var_names
    }
    se <- rbind(se, new_se)
    beta_variance[[i]] <- list(beta = new_beta, var = new_se)
    ph.test <- rbind( ph.test, cox.zph(model)[[1]][,3])
    colnames(model$var) <- names(model$coef)
    rownames(model$var) <- names(model$coef)
    covarianceMatrices[[i]] <- model$var
    survObject[[i]] <- model$y
  }

  colnames(beta) <- names(model$coef)
  colnames(se) <- names(model$coef); rownames(se) <- NULL
  
  combined.results <- list()
  beta.out <- as.vector( mi.meld(q = beta, se = se)	$ q.mi); names(beta.out) <- names(model$coef)
  se.out <- as.vector(mi.meld(q = beta, se = se)	$ se.mi); names(se.out) <- names(model$coef)

  output <- list()
  output[['summary']] <- data.frame(beta = beta.out, 
                                              se = se.out,
                                          z = beta.out/se.out,
                                          p = pnorm(-abs(beta.out/se.out)) * 2
                                              )
  output[['beta']] <- beta.out
  output[['se']] <- se.out
  output[['qhat']] <- beta
  output[['N']] <- model$n
  output[['nevents']] <- model$nevent
  output[['ph.test']] <- apply(ph.test, 2, mean)
  output[['X']] <- model$x
  output[['formul']] <- formul
  output[['mi']] <- 'Yes'
  output[['imputed.data']] <- ameliaObject$imputations
  
  #Omit strata from output
  omitStrata <- grep("strata", attr( model$terms, "term.labels") )
  strata <- attr( model$terms, "term.labels")[omitStrata]
  if (length(strata) > 0){
    addStrata <- c(unique(do.call(rbind, lapply(strsplit(strata, '\\('), FUN = function(x) strsplit(x[2], ')') ))))
  } else{
    addStrata <- ''
  }
  thedata <- dat[(1:nrow(model$y)), ]
  thedata[[DV_Label]] <- model$y[,'status']
  new_form <- formula(paste(DV_Label, paste(colnames(model$x), collapse='+'), sep = ' ~ '))
  ReferenceModel <- lm(new_form, data = thedata)
  output[['ReferenceModel']] <- ReferenceModel
  output[['Strata']] <- unlist(addStrata)
  output[['survObject']] <- survObject
  return(output)
}

CoxmodelList <- function(...){
  #input is several models
  linearModels <- list()
  coefs <- list()
  se <- list()
  multipleImputation <- list()
  loglik <- list()
  N <- list()
  Frailties <- list()
  Strata <- list()
  Nevents <- list()
  models.imp <- list()
  for (model in list(...)){
    linearModels <- c(linearModels, list(model$ReferenceModel))
    thecoefs <- model$beta
    coefs <- c(coefs, list(model$beta))
    se <- c(se, list(model$se))
    multipleImputation <- c(multipleImputation, list(model$mi))
    loglik <- c(loglik, list(model$loglik))
    N <- c(N, model$N)
    Nevents <- c(Nevents, model$nevents)
    Strata <- c(Strata, list(model$Strata))
    models.imp <- c(models.imp, list(model))
  }
  allStrata <- unique(do.call('c', unlist(list(Strata), recursive=FALSE)))
  allStrata <- allStrata[allStrata != '']
  if (length(allStrata) > 0 ){
    returnallStrata <- list()
    for (i in 1:length(allStrata)){
      returnallStrata[[allStrata[[i]]]] <- NULL
      for (model in list(...)){
        if (allStrata[i] %in% model$Strata){
          returnallStrata[[allStrata[[i]]]] <- c(returnallStrata[[allStrata[[i]]]], 'Yes')
        } else {returnallStrata[[allStrata[[i]]]] <- c(returnallStrata[[allStrata[[i]]]], 'No')}
      }
    }
  }else (returnallStrata <- NULL)

  return(list(linearModels = linearModels,
              coefs = coefs,
              se = se,
              mi = multipleImputation,
              N = N,
              Nevents = Nevents,
              Strata = returnallStrata,
              models = models.imp
  ))
}

#Create table of results from ameliaCoxEstimate
CoxTable <- function(modelListObject, dict, ref.Models = NULL, strat.custom = NULL, dep.var.caption = '', texLabel = '', column.labels = NULL, column.separate = NULL, omittedvars = NULL, title = '', notes = '', the_dep.var.labels = NULL, tableOrder = NULL, dep.var.labels.include = FALSE, dep.var.labels = NULL, align = FALSE, notes.align = 'r', float = TRUE, font.size = NULL, type.in = 'latex'){
  #ref.Models is a list of same length as the number of models
  if (!(is.null(ref.Models))){
    if (length(ref.Models) != length(modelListObject$linearModels)){
      stop('Fitted and compared models must be of same length')
    }
  }
  mis <- unlist(modelListObject[['mi']])
  N <- unlist(modelListObject[['N']])
  Nevents <- unlist(modelListObject[['Nevents']])
  Strata <- unlist(modelListObject[['Strata']])
  add.lines <- NULL
  if (is.null(strat.custom)){
    StrataWrite <- list()
    StrataWrite[[1]] <- c('Stratified by geographic region', 'No', "No", 'Yes', 'No',  'No', "No", 'No', "No")
    StrataWrite[[2]] <- c('Stratified by event number', 'No', 'No', 'No', "Yes",  "No", "Yes", "Yes", "Yes")
  }else{
    StrataWrite <- list()
    StrataWrite[[1]] <- strat.custom[[1]]
    StrataWrite[[2]] <- strat.custom[[2]]
  }
  
  add.lines <- c(add.lines, StrataWrite)
  model.dtls <-list(
    c('N observations', N),
    c('N breakdowns', Nevents),
    c('Multiple imputation', mis)
  )
  
  if (!(is.null(ref.Models))){
    pvalues <- '$p$-value of significance test: '
    for (i in 1:length(modelListObject$models)){
      if (!is.null(ref.Models)){
        pvalues <- c(pvalues, deci(coxLRtest(fit1 = modelListObject$models[[i]], fit0 = ref.Models[[i]]), 3))
      }else{
        pvalues <- c(pvalues, '')
      }
    }
    model.dtls[[length(model.dtls) + 1]] <- pvalues
  }
  
  starGaz <- stargazer(modelListObject$linearModels,
                       coef = modelListObject$coefs,
                       se = modelListObject$se,
                       column.labels = column.labels,
                       column.separate = column.separate,
                       dep.var.caption = dep.var.caption,
                       title =  title ,
                       omit = c('Constant', omittedvars, 'spline', 'factor'),
                       notes = notes,
                       omit.stat = 'all',
                       order = tableOrder,
                       dep.var.labels = the_dep.var.labels,
                       dep.var.labels.include = dep.var.labels.include,
                       notes.align = notes.align,
                       align = align,
                       label = texLabel,
                       float = float,
                       font.size = font.size,
                       type = type.in,
                       add.lines = c(add.lines,
                                     #list(c('Frailties, Country', Frailties)),
                                     model.dtls
                       )
  )
  for (entry in dict){
    starGaz <- gsub(entry, names(dict)[dict == entry], starGaz)
  }
  starGaz <- gsub('Num. Obs.', '\\\\\\\\ \\\\hline \\\\\\\\ Num. Obs.', starGaz)
  return(starGaz)
}


###################################
#Synthetic control method
###################################
#Functions to perform the synthetic control method

#Remove missing observation before using the dataprep function from the Synth package 
balance_dataset <- function(d, pred_vars, dep_var, include_1_4 = T, include_9_12 = T){
  new_d <- NULL
  bad_cases <- c()
  vars <- c(
    pred_vars,
    dep_var
  )
  for (i in 1:nrow(d)){
    new_row <- c()
    new_row['cus_sr'] <- as.character(d[i, 'cus_sr'])
    new_row['ccode'] <- as.character(d[i, 'ccode'])
    new_row['Country'] <- as.character(d[i, 'Country'])
    new_row['cus_t_surv'] <- as.integer(as.character(d[i, 'cus_t_surv']))
    new_row['cus_caseid'] <- as.character(d[i, 'cus_caseid'])
    
    if (include_9_12==T){
      count_missing <- matrix(data = 0, nrow = 4, ncol = length(vars))
      colnames(count_missing) <- vars
      j <- 1
      for (t in 13:10){
        new_row['Year'] <- as.integer(d[i, 'Year']) - t
        new_row['t'] <- -t+1
        for (the_var in vars){
          lagval <- paste(the_var, paste('L', t, sep = ''), sep = '')
          new_row[the_var] <- as.numeric(as.character(d[i, lagval]))
          if (is.na(new_row[the_var]) ){
            count_missing[j, the_var] <- 1
          }
        }
        j <- j+1
        new_d <- rbind(new_d, new_row)
        lagdepval <- paste(dep_var, paste('L', t, sep = ''), sep = '')
        if (is.na(d[i, lagdepval])){bad_cases <- c(bad_cases, new_row['cus_caseid'])}
      }
      if(max(apply(count_missing, 2, sum)) == 4){bad_cases <- c(bad_cases, new_row['cus_caseid'])}
    }
    
    count_missing <- matrix(data = 0, nrow = 4, ncol = length(vars))
    colnames(count_missing) <- vars
    j <- 1
    for (t in 9:6){
      new_row['Year'] <- as.integer(d[i, 'Year']) - t
      new_row['t'] <- -t+1
      for (the_var in vars){
        lagval <- paste(the_var, paste('L', t, sep = ''), sep = '')
        new_row[the_var] <- as.numeric(as.character(d[i, lagval]))
        if (is.na(new_row[the_var]) ){
          count_missing[j, the_var] <- 1
        }
      }
      j <- j+1
      new_d <- rbind(new_d, new_row)
      lagdepval <- paste(dep_var, paste('L', t, sep = ''), sep = '')
      if (is.na(d[i, lagdepval])){bad_cases <- c(bad_cases, new_row['cus_caseid'])}
    }
    if(max(apply(count_missing, 2, sum)) == 4){bad_cases <- c(bad_cases, new_row['cus_caseid'])}
    
    if (include_1_4 ==T){
      count_missing <- matrix(data = 0, nrow = 4, ncol = length(vars))
      colnames(count_missing) <- vars
      j <- 1
      for (t in 5:2){
        new_row['Year'] <- as.integer(d[i, 'Year']) - t
        new_row['t'] <- -t+1
        for (the_var in vars){
          lagval <- paste(the_var, paste('L', t, sep = ''), sep = '')
          new_row[the_var] <- as.numeric(as.character(d[i, lagval]))
          if (is.na(new_row[the_var]) ){
            count_missing[j, the_var] <- 1
          }
        }
        j <- j+1
        new_d <- rbind(new_d, new_row)
        lagdepval <- paste(dep_var, paste('L', t, sep = ''), sep = '')
        if (is.na(d[i, lagdepval])){bad_cases <- c(bad_cases, new_row['cus_caseid'])}
      }
      if(max(apply(count_missing, 2, sum)) == 4){bad_cases <- c(bad_cases, new_row['cus_caseid'])}
    }
    
    for (the_var in vars){
      new_row['Year'] <- as.integer(d[i, 'Year'])-1
      new_row['t'] <- 0
      lagval <- paste(the_var, 'L1', sep = '')
      new_row[the_var] <- as.numeric(as.character(d[i, lagval]))
      if (is.na(d[i, lagval])){bad_cases <- c(bad_cases, new_row['cus_caseid'])}
    }
    new_d <- rbind(new_d, new_row)
    for (the_var in vars){
      new_row['Year'] <- as.integer(d[i, 'Year'])
      new_row['t'] <- 1
      lagval <- the_var
      new_row[the_var] <- as.numeric(as.character(d[i, lagval]))
      if (is.na(d[i, lagval])){bad_cases <- c(bad_cases, new_row['cus_caseid'])}
    }
    new_d <- rbind(new_d, new_row)
    for (t in 2:10){
      new_row['Year'] <- as.integer(d[i, 'Year']) + t
      new_row['t'] <- t
      for (the_var in vars){
        leadval <- paste(the_var, paste('Lead', t, sep = ''), sep = '')
        new_row[the_var] <- as.numeric(as.character(d[i, leadval]))
        #if (is.na(new_row[the_var]) ){bad_cases <- c(bad_cases, new_row['cus_caseid'])}
      }
      leaddepval <- paste(dep_var, paste('Lead', t, sep = ''), sep = '')
      if (is.na(d[i, leaddepval])){bad_cases <- c(bad_cases, new_row['cus_caseid'])}
      new_d <- rbind(new_d, new_row)
    }
  }
  names(bad_cases) <- NULL
  bad_cases <- unique(bad_cases)
  new_d <- as.data.frame(new_d, stringsAsFactors = FALSE)
  rownames(new_d) <- 1:nrow(new_d)
  names(new_d) <- colnames(new_d)
  new_d[,dep_var] <- as.numeric(as.character(new_d[,dep_var]))
  for (the_var in pred_vars){
    new_d[,the_var] <-   as.numeric(as.character(new_d[, the_var]))
  }
  new_d$ccode <- as.integer(as.character(new_d$ccode))
  new_d$t <-as.integer(as.character(new_d$t))
  #bad_cases <- bad_cases[!(bad_cases %in% 'China 1949-')]
  new_d <- new_d[!(new_d$cus_caseid %in% bad_cases),]
  
  new_d$case_num <- as.numeric(as.factor(as.character(new_d$cus_caseid)))
  new_d$cus_caseid <- as.character(new_d$cus_caseid)
  new_d$cus_t_surv <- as.integer(new_d$cus_t_surv)
  new_d$cus_sr <- as.integer(new_d$cus_sr)
  return(list(d = new_d, bad_cases = bad_cases))
}

#Calculate the root mean square prediction error over the pre-treatment period
pre_RMSPE <- function(synth.res, dataprep.res, transform = F){
  if (transform == F){
    control <- dataprep.res$Y0plot %*% synth.res$solution.w
    treat <- dataprep.res$Y1plot
  }else{
    control <- exp(dataprep.res$Y0plot %*% synth.res$solution.w)
    treat <- exp(dataprep.res$Y1plot)
  }
  pre_t <- which(as.integer(rownames(treat)) < 0)
  pre_RMSPE <- sqrt(mean((treat[pre_t]-control[pre_t])^2))
  return(pre_RMSPE)
}

#Calculate the gap in outcome between the real and synthetic unit over the post-treatment period
post_Gap <- function(synth.res, dataprep.res, maxT = 10, transform = F){
  if (transform == F){
    control <- dataprep.res$Y0plot %*% synth.res$solution.w
    treat <- dataprep.res$Y1plot
  }else{
    control <- exp(dataprep.res$Y0plot %*% synth.res$solution.w)
    treat <- exp(dataprep.res$Y1plot)
  }
  post_t <- which(as.integer(rownames(treat)) > 0&as.integer(rownames(treat))<=maxT)
  post_gap <- mean(treat[post_t]-control[post_t])
  return(post_gap)
}

path.plot.cus <- function (synth.res = NA, dataprep.res = NA, tr.intake = NA,
                           Ylab = c("Y Axis"), Xlab = c("Year"), Ylim = NA, Legend = c("Treated", "Synthetic"), Legend.position = c("topright"), Main = NA,
                           transform = FALSE){
  if (transform == FALSE){
    y0plot1 <- dataprep.res$Y0plot %*% synth.res$solution.w
    Y1plot <- dataprep.res$Y1plot
  }else{
    y0plot1 <- exp(dataprep.res$Y0plot %*% synth.res$solution.w)
    Y1plot <- exp(dataprep.res$Y1plot)
  }
  if (sum(is.na(Ylim)) > 0) {
    Y.max <- max(c(y0plot1, dataprep.res$Y1plot))
    Y.min <- min(c(y0plot1, dataprep.res$Y1plot))
    Ylim <- c((Y.min - 0.3 * Y.min), (0.3 * Y.max + Y.max))
  }
  plot(dataprep.res$tag$time.plot, Y1plot ,
       t = "l", col = "black", lwd = 2, main = Main, ylab = Ylab,
       xlab = Xlab, xaxs = "i", yaxs = "i", ylim = Ylim, xaxt="n")
  axis(1, at = c(-10, -5, 0, 5, 10), labels = c(tr.intake-10, tr.intake-5, tr.intake, tr.intake+5, tr.intake+10))
  lines(dataprep.res$tag$time.plot, y0plot1, col = "black",
        lty = "dashed", lwd = 2, cex = 4/5)
  abline(v = tr.intake, lty = 3, col = "black", lwd = 2)
  if (sum(is.na(Legend)) == 0) {
    legend(Legend.position, legend = Legend, lty = 1:2, col = c("black",
                                                                "black"), lwd = c(2, 2), cex = 6/7)
  }
}


###################################
#Dictionaries for renaming variables
###################################
dictSummstatsL2 <- c('InstalledCom', 'sv[\\\\]_comm', 'ln.oil[\\\\]_gas[\\\\]_valuePOP[\\\\]_2014L2', 'ln.tpopL2', 'polity2L2', "e[\\\\]_migdppclnL2", "e[\\\\]_migdpgroL2", "e_migdpgroL2", "e_migdppclnL2", 'e_regionpol', 'evnum', 'mon', 'mil', 'partB', 'milB', 'persB', 'cus[\\\\]_sr')
names(dictSummstatsL2) <- c('Foreign installed communist', 'Communist', 'Oil and gas prod. per cap. (log)', 'Population (log)', 'Polity', 'Per Capita GDP (log)', 'GDP Growth (pct.)', 'GDP Growth', 'Per Cap. GDP (log)', 'Region', 'N', 'Monarchy', 'Military', 'Party', 'Military', 'Personalist', 'Revolution')

dictSummstatsL1 <- c('InstalledCom', 'sv[\\\\]_comm', 'ln.oil[\\\\]_gas[\\\\]_valuePOP[\\\\]_2014L1', 'ln.tpopL1', 'polity2L1', "e[\\\\]_migdppclnL1", "e[\\\\]_migdpgroL1", "e_migdpgroL1", "e_migdppclnL1", 'e_regionpol', 'evnum', 'mon', 'mil', 'partB', 'milB', 'persB', 'cus[\\\\]_sr', 'e[\\\\]_migdppclnL4')
names(dictSummstatsL1) <- c('Foreign installed communist', 'Communist', 'Oil and gas prod. per cap. (log)', 'Population (log)', 'Polity', 'Per Cap. GDP (log)', 'GDP Growth', 'GDP Growth (pct.)', 'Per Cap. GDP (log)', 'Region', 'N', 'Monarchy', 'Military', 'Party', 'Military', 'Personalist', 'Revolution', 'Ln(GDP per Capita)')

dictSummstats <- c(dictSummstatsL1, dictSummstatsL2)
dictSummstats <- c(dictSummstats, 'Oil, Gas Production' = 'ln.oil_gas_valuePOP_2014L2', 'Oil, Gas Production' = 'ln.oil_gas_valuePOP_2014L1')

